Import gene count data and create DESeqDataSet

# Import gene expression / count data
counts <- read.table("data/tagseq/processed/counts_newpars.txt", 
                     header = TRUE, row.names = 1)
# Trim column names to just the sample name
colnames(counts) <- str_sub(colnames(counts), 1, 7)
# Order columns by sample name
counts <- counts[, order(colnames(counts))]

# Import sample metadata
sdat <- read_xlsx("data/sample_metadata.xlsx") %>%
  mutate(sample = paste0(species, colony, ".", core),
         group = paste(trt1, trt2, sep = ""),
         colony = factor(colony)) %>%
  mutate_if(is.character, as.factor) %>%
  arrange(sample) %>%                              # order rows by sample name
  column_to_rownames(var = "sample")               # set sample name to rownames

Import gene annotations

trinotate <- readxl::read_xlsx("data/genome/Mcavernosa_trinotate_annotation_report.xlsx",
                                sheet = "Mcavernosa_trinotate_annotation")

Create DESeqDataSet

# Create full DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = counts,
                              colData = sdat,
                              design = ~ colony)

# Subset DESeqDataSet
# (Only colonies 20, 22, and 26 were in repeat heat stress exp)
# (Only time point at end of heat stress)
#hs.dds <- dds[, !is.na(colData(dds)$symtrt)]
hs.dds <- dds[, colData(dds)$date_sampled == "2017-10-28" & colData(dds)$colony %in% c(20, 22, 26)]
#hs.dds <- dds

# Add design formula for heat stress experiment and DROP UNUSED FACTOR LEVELS!
design(hs.dds) <- formula(~ colony + group)
colData(hs.dds) <- droplevels(colData(hs.dds))

QC/pre-filter count data

# Read in count totals of raw, postQC, and mapped reads
count_summ <- read.table("data/tagseq/processed/count_totals_newpars.txt") %>%
  mutate(sample_id = str_sub(V1, 3, 7),
         raw = V2, post_qc = V4, mapped = V6) %>%
  select(sample_id, raw, post_qc, mapped) %>%
  gather(stage, count, -sample_id) %>%
  separate(sample_id, into = c("colony", "core"))

# Summarize counts for only samples included in paper (samples 2017-10-28)
filter(sdat, date_sampled == "2017-10-28") %>%
  left_join(count_summ) %>%
  group_by(stage) %>%
  summarize(total = sum(count),
            min = min(count),
            max = max(count),
            median = median(count))
## # A tibble: 3 x 5
##   stage       total    min      max   median
##   <chr>       <int>  <int>    <int>    <dbl>
## 1 mapped    5857638  14339  1230318   82032 
## 2 post_qc  39958845 317414  4481999  581370.
## 3 raw     177141789 577272 54489606 2569899
# How many genes detected among all samples 2017-10-28
dds1 <- dds[, colData(dds)$date_sampled == "2017-10-28"]
assay1 <- assay(dds1)[rowSums(assay(dds1)) > 0, ]
dim(assay1)
## [1] 18722    48
# Number of samples
nsamples <- ncol(counts(hs.dds))
# Number of reads per sample
rps <- qplot(colSums(counts(hs.dds))) +
  labs(x = "Reads per sample", y = "Number of samples",
       title = "Read counts per sample") +
  geom_label(aes(x = 6e5, y = 10, label = paste(nsamples, "samples")))
# Sample Mc20-05 has far more counts than others -- may need to rarefy this sample down to median count?

# Number of genes
# Remove genes with counted zero times across entire heat stress experiment dataset
#hs.dds <- hs.dds[ rowSums(counts(hs.dds)) > 0, ]

# Remove genes with less than 2 mean count across samples
hs.dds <- hs.dds[ rowMeans(counts(hs.dds)) > 1, ]
ngenes <- nrow(counts(hs.dds))
# Number of reads per gene
rpg <- qplot(log10(rowSums(counts(hs.dds)))) +
  labs(x = "Reads per gene", y = "Number of genes",
       title = "Read counts per gene") +
  geom_label(aes(x = 4, y = 750, label = paste(ngenes, "genes")))

plot_grid(rps, rpg)

# FILTER OUT SAMPLE THAT DIDN'T SHUFFLE WHEN IT WAS SUPPOSED TO -- Mc22.12 (17th sample in hs.dds)
hs.dds <- hs.dds[, -17]

# Normalize expression data for visualization purposes using VST tranformation
vsd <- vst(hs.dds, blind = FALSE)

Visualize samples in multivariate space

Principal Component Analysis

(based on normalized gene counts) ** Wright et al. also perform PCoA only including DEGs, and of course this shows more clustering by group… ** Could also consider performing PCoA only including genes with <50% residual variance?

vsdf <- vsd[, colData(vsd)$colony %in% c(20, 22, 26) & colData(vsd)$group %in% c("bc", "bh", "cc", "ch")]

## Run PCA and return data to visualize with ggplot
pcaData <- plotPCA(vsdf, intgroup = c("colony", "group"), returnData = TRUE)
pcaData <- pcaData %>%
  mutate(group.1 = recode(group.1, bc = "D - control", bh = "D - heated", 
                          cc = "C - control", ch = "C - heated"))
percentVar <- round(100 * attr(pcaData, "percentVar"))

## Plot PCA
ggplot(pcaData, aes(x = PC1, y = PC2, color = colony, shape = group.1)) +
  geom_point() +
  scale_shape_manual(values = c(21,24,16,17)) +
  labs(title =  "PCA: all genes") +
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance"))

Principal Coordinate Analysis

(based on sample distances)

## Calculate Euclidean distances among samples
sampleDists <- dist(t(assay(vsdf)), method = "manhattan")
sampleDistMatrix <- as.matrix( sampleDists )

## Plot NMDS
mds <- as.data.frame(colData(vsdf)) %>%
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(group = recode(group, bc = "D - control", bh = "D - heated", 
                          cc = "C - control", ch = "C - heated"))

#mdsf <- mds %>% filter(group %in% c("ca", "ch", "ba", "bh"))

ggplot(mds, aes(x = `1`, y = `2`, color = colony, shape = group)) +
    geom_point() +# coord_fixed() +
    scale_shape_manual(values = c(21,24,16,17)) +
    labs(x = "PC1", y = "PC2")# +

    geom_text(label = paste(mds$colony, mds$core))
## geom_text: parse = FALSE, check_overlap = FALSE, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
# Plot with spiders instead of hulls
  # tidy solution to get centroids for ggplot below
mds <- mds %>%
  group_by(colony, trt1, trt2) %>%
  summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%
  full_join(mds)

ggplot(mds, aes(color = colony, shape = trt1, fill = trt2)) +
    geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2),
                 lwd = 0.25, col = "grey") +
    geom_point(aes(x = c1, y = c2, shape = trt1), size = 5, alpha = 1) +
    geom_point(aes(x = `1`, y = `2`, shape = trt1), alpha = 1) +# coord_fixed() +
    scale_shape_manual(values = c(24,21)) +
    #scale_fill_manual(values = c("#F8766D", "white", "#00BA38", "white", "#619CFF", "white")) +
    scale_fill_manual(values = c("grey", "white")) +
    labs(x = "PC1", y = "PC2")# +

    geom_text(aes(x = `1`, y = `2`), label = paste(mds$colony, mds$core))
## mapping: x = ~`1`, y = ~`2` 
## geom_text: parse = FALSE, check_overlap = FALSE, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
  • Both PCA and NMDS show that samples are most clearly differentiated by colony
  • Colony 26 (and colony 20 but less-so and in opp. dir.) is separated by symbiont (or past treatment)
  • Colony 22 separates a little by heat treatment, other colonies do not

Partition total variance among explanatory factors

# optional step to run analysis in parallel on multicore machines
# Here use 8 threads
# This is strongly recommended since the analysis
# can be computationally intensive
cl <- makeCluster(8)
registerDoParallel(cl)

# Get normalized read counts for hs.dds / heat stress experiment
# This is just for the 36 samples in the heat stress experiment -- 3 colonies
ncounts <- assay(vsd)   # normalized counts, previously filtered to remove genes with total count < 3
# Specify variables to consider
form <- ~ (1|colony) + (1|trt1) + (1|trt2) + (1|colony:trt1) + (1|colony:trt2) + (1|trt1:trt2) + (1|colony:trt1:trt2)
# Get sample metadata
info <- data.frame(colData(hs.dds))

# Fit model and extract results
# Interpretation: the variance explained by each variables after correcting for all other variables
#varPart <- fitExtractVarPartModel(ncounts, form, info)
varPart <- fitExtractVarPartModel(ncounts, form, info)
## 
## Finished...
## Total: 105 s
stopCluster(cl)

# sort variables (i.e. columns) by median fraction of variance explained
vp <- sortCols( varPart )
topgenes <- vp %>%
  mutate(gene = rownames(.)) %>%
  gather(key, value, -gene) %>%
  group_by(key) %>%
  filter(value == max(value), key != "Residuals")

pvp <- plotVarPart(vp, label.angle = 60, outlier.size = NA)

pvp + geom_text(data = topgenes, aes(x = key, y = value * 100, label = gene),
                nudge_y = 5, size = 3)

vp %>% summarize_all(median) %>% knitr::kable(caption = "Median variance explained for all genes")
Median variance explained for all genes
colony:trt1 colony colony:trt1:trt2 colony:trt2 trt1 trt1:trt2 trt2 Residuals
0.012656 0 0 0 0 0 0 0.8696836
# This plot shows that for most genes, most variance is residual/unexplained.


# Sum of variance explained? Relative comparison of total explanatory power of each?
vp %>% 
  summarise_all(sum) %>%
  gather() %>%
  filter(key != "Residuals") %>%
  ggplot(aes(x = key, y = value, fill = key)) +
    geom_col(position = "stack") +
    labs(x = "", y = "Relative total variance explained")

Partition total variance among explanatory factors for each colony

# Get sample data for heat stress experiment into tibble
coldata <- sdat %>%
  rownames_to_column("sample") %>%
  as_tibble() %>%
  select(sample, colony, trt1, trt2, date_sampled, group)
# Gather raw counts into tibble
colcounts <- counts %>%
  rownames_to_column("gene") %>%
  as_tibble() %>%
  gather(key = "sample", value = "count", -gene)
# Join count data with sample data, and nest vs count data by colony
all <- full_join(coldata, colcounts) %>%
  filter(colony %in% c(20, 22, 26), date_sampled == "2017-10-28") %>%
  nest(-colony) 

# Create DESeqDataSet for each colony, filter genes mean count > 2 within each colony, and vst transform
all <- all %>%
  mutate(sdat = map(data, ~ data.frame(column_to_rownames(distinct(., sample, trt1, trt2, group), "sample"))),
         counts = map(data, ~ spread(select(., sample, gene, count), sample, count)),
         counts = map(counts, ~ as.matrix(column_to_rownames(., "gene"))),
         dds = map2(counts, sdat, ~ DESeqDataSetFromMatrix(countData = .x, colData = .y, design = ~ group)),
         ddsf = map(dds, function(x) x[rowMedians(counts(x)) > 1, ]),  # Filter DESeqDataSet
         vsd = map(ddsf, ~ vst(., blind = FALSE)))  # Variance Stabilizing Transformation

# Run variance partition analysis for each colony
cl <- makeCluster(8); registerDoParallel(cl)
all <- all %>%
  mutate(vp = map2(vsd, sdat, ~ fitExtractVarPartModel(
    exprObj = assay(.x), data = .y, formula = ~ (1|trt1) + (1|trt2) + (1|trt1:trt2)
    )))
## 
## Finished...
## Total: 70 s
## 
## Finished...
## Total: 36 s
## 
## Finished...
## Total: 39 s
stopCluster(cl)

# Calculate sum of variance explained by each source across all genes for each colony
all <- all %>%
  mutate(vpsum = map(vp, ~ summarise_all(., sum)))

# Plot
all %>% 
  unnest(vpsum, .drop = T) %>%
  gather(source, value, -colony) %>%
  group_by(colony) %>%
  mutate(relvar = value / sum(value)) %>%   # TOTAL VARIANCE NOT SAME FOR EACH COLONY -- NORMALIZE WITHIN COLONY
  filter(source != "Residuals") %>%
  ggplot(aes(x = source, y = relvar, fill = source)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ colony) +
  labs(x = "", y = "Relative total variance explained")

all %>% 
  unnest(vpsum, .drop = T) %>%
  gather(source, value, -colony) %>%
  #filter(source != "Residuals") %>%
  ggplot(aes(x = colony, y = value, fill = source)) +
  geom_bar(stat = "identity", position = "fill") +
  labs(x = "", y = "Relative total variance explained")

Colony 20 DE

# DE testing for individual colonies since colony:trt1 interaction biggest explainer of variance
col20dds <- dds[, colData(dds)$colony == 20 & colData(dds)$date_sampled == "2017-10-28"]
# Add design formula for heat stress experiment and DROP UNUSED FACTOR LEVELS!
design(col20dds) <- formula(~ trt1)
colData(col20dds) <- droplevels(colData(col20dds))
# Remove genes with less than 2 mean count across samples
col20dds <- col20dds[ rowMeans(counts(col20dds)) > 2, ]
# Normalize expression data for visualization purposes using VST tranformation
col20vsd <- vst(col20dds, blind = FALSE)

## Run PCA and return data to visualize with ggplot
pcaData <- plotPCA(col20vsd, intgroup = "group", returnData = TRUE)
pcaData <- pcaData %>%
  mutate(group.1 = recode(group.1, bc = "D - control", bh = "D - heated", 
                          cc = "C - control", ch = "C - heated"))
percentVar <- round(100 * attr(pcaData, "percentVar"))

## Plot PCA
ggplot(pcaData, aes(x = PC1, y = PC2, shape = group.1)) +
  geom_point() +
  scale_shape_manual(values = c(21,24,16,17)) +
  labs(title =  "PCA: all genes") +
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance"))

## DE testing
col20dds <- DESeq(col20dds)
col20.Dctrl.Cctrl <- results(col20dds, contrast = c("trt1", "b", "c"))
# Subset DE genes with adjusted p-value < 0.1
col20.Dctrl.Cctrl.sig <- col20.Dctrl.Cctrl[which(col20.Dctrl.Cctrl$padj < 0.1 ), ]
# Get names of DE genes
col20.Dctrl.Cctrl.DEgenes <- rownames(col20.Dctrl.Cctrl.sig)
# Summarize DE
as.data.frame(col20.Dctrl.Cctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
24 23 1
plotCounts(col20dds, gene = "Mcavernosa32279", intgroup = "group")

Colony 22 DE

# DE testing for individual colonies since colony:trt1 interaction biggest explainer of variance
col22dds <- dds[, colData(dds)$colony == 22 & colData(dds)$date_sampled == "2017-10-28"]
rownames(colData(col22dds))  # remove 5th sample -- 22-12 didn't shuffle
##  [1] "Mc22.01" "Mc22.03" "Mc22.08" "Mc22.09" "Mc22.12" "Mc22.13" "Mc22.16"
##  [8] "Mc22.18" "Mc22.20" "Mc22.23" "Mc22.24" "Mc22.27"
col22dds <- col22dds[, -5]
# Add design formula for heat stress experiment and DROP UNUSED FACTOR LEVELS!
design(col22dds) <- formula(~ trt1)
colData(col22dds) <- droplevels(colData(col22dds))
# Remove genes with less than 2 mean count across samples
col22dds <- col22dds[ rowMeans(counts(col22dds)) > 2, ]
# Normalize expression data for visualization purposes using VST tranformation
col22vsd <- vst(col22dds, blind = FALSE)

## Run PCA and return data to visualize with ggplot
pcaData <- plotPCA(col22vsd, intgroup = "group", returnData = TRUE)
pcaData <- pcaData %>%
  mutate(group.1 = recode(group.1, bc = "D - control", bh = "D - heated", 
                          cc = "C - control", ch = "C - heated"))
percentVar <- round(100 * attr(pcaData, "percentVar"))

## Plot PCA
ggplot(pcaData, aes(x = PC1, y = PC2, shape = group.1)) +
  geom_point() +
  scale_shape_manual(values = c(21,24,16,17)) +
  labs(title =  "PCA: all genes") +
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance"))

## DE testing
col22dds <- DESeq(col22dds)
col22.Dctrl.Cctrl <- results(col22dds, contrast = c("trt1", "b", "c"))
# Subset DE genes with adjusted p-value < 0.1
col22.Dctrl.Cctrl.sig <- col22.Dctrl.Cctrl[which(col22.Dctrl.Cctrl$padj < 0.1 ), ]
# Get names of DE genes
col22.Dctrl.Cctrl.DEgenes <- rownames(col22.Dctrl.Cctrl.sig)
# Summarize DE
as.data.frame(col22.Dctrl.Cctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
0 0 0
intersect(col20.Dctrl.Cctrl.DEgenes, col22.Dctrl.Cctrl.DEgenes)
## character(0)

Col 26 DE

# DE testing for individual colonies since colony:trt1 interaction biggest explainer of variance
col26dds <- dds[, colData(dds)$colony == 26 & colData(dds)$date_sampled == "2017-10-28"]
# Add design formula for heat stress experiment and DROP UNUSED FACTOR LEVELS!
design(col26dds) <- formula(~ trt1)
colData(col26dds) <- droplevels(colData(col26dds))
# Remove genes with less than 2 mean count across samples
col26dds <- col26dds[ rowMeans(counts(col26dds)) > 2, ]
# Normalize expression data for visualization purposes using VST tranformation
col26vsd <- vst(col26dds, blind = FALSE)

## Run PCA and return data to visualize with ggplot
pcaData <- plotPCA(col26vsd, intgroup = "group", returnData = TRUE)
pcaData <- pcaData %>%
  mutate(group.1 = recode(group.1, bc = "D - control", bh = "D - heated", 
                          cc = "C - control", ch = "C - heated"))
percentVar <- round(100 * attr(pcaData, "percentVar"))

## Plot PCA
ggplot(pcaData, aes(x = PC1, y = PC2, shape = group.1)) +
  geom_point() +
  scale_shape_manual(values = c(21,24,16,17)) +
  labs(title =  "PCA: all genes") +
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance"))

## DE testing
col26dds <- DESeq(col26dds)
col26.Dctrl.Cctrl <- results(col26dds, contrast = c("trt1", "b", "c"))
# Subset DE genes with adjusted p-value < 0.1
col26.Dctrl.Cctrl.sig <- col26.Dctrl.Cctrl[which(col26.Dctrl.Cctrl$padj < 0.1 ), ]
# Get names of DE genes
col26.Dctrl.Cctrl.DEgenes <- rownames(col26.Dctrl.Cctrl.sig)
# Summarize DE
as.data.frame(col26.Dctrl.Cctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
624 439 185
# data.frame(col26.Dctrl.Cctrl) %>% 
#   rownames_to_column("gene") %>%
#   filter(padj < 0.1) %>%
#   arrange(logP) %>%
#   head(.)
# 
# plotCounts(col26dds, gene = "Mcavernosa25729", intgroup = "group")
# plotCounts(col26dds, gene = "Mcavernosa20412", intgroup = "group")
# plotCounts(col26dds, gene = "Mcavernosa23531", intgroup = "group")
# plotCounts(col26dds, gene = "Mcavernosa12472", intgroup = "group")
# plotCounts(col26dds, gene = "Mcavernosa08787", intgroup = "group")
# 
# 
# plotCounts(col26dds, gene = "Mcavernosa01063", intgroup = "group")   # Actitoxin
# plotCounts(col26dds, gene = "Mcavernosa22057", intgroup = "group")  # May be an organic anion pump relevant to cellular detoxification.
# plotCounts(col26dds, gene = "Mcavernosa04800", intgroup = "group")
# plotCounts(col26dds, gene = "Mcavernosa06989", intgroup = "group")
# 
# plotCounts(col26dds, gene = "Mcavernosa15521", intgroup = "group")
# 
# plotCounts(col26dds, gene = "Mcavernosa03615", intgroup = "trt1")   # Oxidative stress gene. up in col20D, down in col26D
# 
# plotCounts(col26dds, gene = "Mcavernosa15942", intgroup = "group")

Wow, colony 26 has tons of DE genes based on having C vs. D

What are they?

# Generate logP values
col26.Dctrl.Cctrl$logP <- -log10(col26.Dctrl.Cctrl$pvalue) * sign(col26.Dctrl.Cctrl$log2FoldChange)

# Write to file
write.table(data.frame(gene=rownames(col26.Dctrl.Cctrl), logP=col26.Dctrl.Cctrl$logP), 
            file = "code/GO_MWU/col26.Dctrl.Cctrl.logP.txt",
            row.names = FALSE, quote = FALSE, sep = ",")

library(KOGMWU)  # install.packages("KOGMWU")

gene2kog1 <- read.table("data/genome/Mcavernosa_iso2kog.tab", sep = "\t") # Downloaded from Studivan github
gene2kog <- gene2kog1

gene2kog <- gene2kog %>% 
  filter(!V2 == "") #%>%
  # filter(!V2 %in% lowcounts$V2)


col26.Dctrl.Cctrl.logP <- read.table("code/GO_MWU/col26.Dctrl.Cctrl.logP.txt", sep = ",", skip = 1)
col26.Dctrl.Cctrl.kog <- kog.mwu(col26.Dctrl.Cctrl.logP, gene2kog)
## [1] "Continuous measure of interest: will perform MWU test"
col26.Dctrl.Cctrl.kog <- col26.Dctrl.Cctrl.kog %>%
  mutate(delta.rank = ifelse(nseqs < 10, 0, delta.rank))

kogtable <- makeDeltaRanksTable(list("col26.Dctrl.Cctrl" = col26.Dctrl.Cctrl.kog)) %>%
  rownames_to_column('term') %>%
  arrange(col26.Dctrl.Cctrl) %>%
  column_to_rownames('term')


col26.Dctrl.Cctrl.kog.pval <- arrange(col26.Dctrl.Cctrl.kog, match(term, rownames(kogtable)))

kogpvaltable <- tibble(
  term = rownames(kogtable),
  col26.Dctrl.Cctrl = stars.pval(col26.Dctrl.Cctrl.kog.pval$padj)
)
kogpvaltable
## # A tibble: 23 x 2
##    term                                                    col26.Dctrl.Cct…
##    <chr>                                                   <chr>           
##  1 Coenzyme transport and metabolism                       .               
##  2 Carbohydrate transport and metabolism                   *               
##  3 Cell cycle control, cell division, chromosome partitio… " "             
##  4 Nucleotide transport and metabolism                     " "             
##  5 Lipid transport and metabolism                          " "             
##  6 Energy production and conversion                        " "             
##  7 Signal transduction mechanisms                          " "             
##  8 Replication, recombination and repair                   " "             
##  9 Chromatin structure and dynamics                        " "             
## 10 Secondary metabolites biosynthesis, transport and cata… " "             
## # … with 13 more rows
#kogtable

# customizing color palette for the heatmap
paletteLength <- 100
myColor <- colorRampPalette(rev(c("chocolate1","#FEE090","grey10", "cyan3","cyan")))(paletteLength)
#myColor <- colorRampPalette(rev(c(brewer.pal(n = 7, name = "RdYlBu"),"darkblue","darkblue")))(100)
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(min(as.matrix(kogtable)), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(as.matrix(kogtable))/paletteLength, max(as.matrix(kogtable)),
                  length.out=floor(paletteLength/2)))

# Plot the heatmap
#pheatmap(as.matrix(kogtable), color=myColor, breaks=myBreaks)

pheatmap(as.matrix(kogtable),
         #clustering_distance_cols="correlation",
         cluster_cols = F,
         cluster_rows = F,
         color=myColor, breaks = myBreaks,
         treeheight_row=15,
         treeheight_col=15,
         border_color="white",
         scale = "none",
         display_numbers = kogpvaltable[,2])

# remove colony effect for downstream visualization 
#vsd <- vst(hs.dds, blind = TRUE)
#vsd <- rlog(hs.dds, blind = TRUE)
## but note DE testing will still use the un-corrected normalized count data
vsd2 <- vsd

assay(vsd2) <- limma::removeBatchEffect(assay(vsd), vsd$colony)
#assay(vsd2) <- limma::removeBatchEffect(assay(vsd2), vsd2$tank)
#assay(vsd2) <- limma::removeBatchEffect(assay(vsd), interaction(vsd$colony, vsd$trt1))

Differential expression testing

Run DESeq pipeline

hs.dds <- DESeq(hs.dds)

D vs. C corals at ambient temperature

# Test for differential expression between D and C corals in the control treatment
#Dctrl.Cctrl <- results(hs.dds, contrast = c("symtrt", "D.ctrl", "C.ctrl"))
Dctrl.Cctrl <- results(hs.dds, contrast = c("group", "bc", "cc"))

# Subset DE genes with adjusted p-value < 0.1
Dctrl.Cctrl.sig <- Dctrl.Cctrl[which(Dctrl.Cctrl$padj < 0.1 ), ]
Dctrl.Cctrl.p05 <- Dctrl.Cctrl[which(Dctrl.Cctrl$pvalue < 0.05), ]

# Get names of DE genes
Dctrl.Cctrl.DEgenes <- rownames(Dctrl.Cctrl.sig)
# Mcavernosa08323: Peroxiredoxin-5 -- protection against oxidative stress

# Summarize DE
as.data.frame(Dctrl.Cctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
30 10 20
Make a heatmap
# Make a heatmap
phm(sig=Dctrl.Cctrl.sig, vsd=vsd2, grp1="bc", grp2="cc", main = "",
    color = colorRampPalette(rev(c("chocolate1","#FEE090","grey10", "cyan3","cyan")))(100),
    border_color = NA, annotation_colors = ann_colors, annotation_legend = FALSE,
    cellwidth = 8.6, cellheight = 1.6, fontsize_row = 4, fontsize_col = 6, fontsize = 4,
    scale = "row")

GO enrichment of DE genes
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis

# Generate logP values
Dctrl.Cctrl$logP <- -log10(Dctrl.Cctrl$pvalue) * sign(Dctrl.Cctrl$log2FoldChange)

# Write to file

write.table(data.frame(gene=rownames(Dctrl.Cctrl), logP=Dctrl.Cctrl$logP), 
            file = "code/GO_MWU/Dctrl.Cctrl.logP.txt",
            row.names = FALSE, quote = FALSE, sep = ",")
# Run GO_MWU
setwd("code/GO_MWU")

commandArgs <- function(...) c("Dctrl.Cctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 15  GO terms at 10% FDR
commandArgs <- function(...) c("Dctrl.Cctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 5  GO terms at 10% FDR
commandArgs <- function(...) c("Dctrl.Cctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 10  GO terms at 10% FDR
# Plot GO_MWU results
setwd("code/GO_MWU")

#png(filename = "../../figures/Dctrl.Cctrl.DE.GO.png", width = 140, height = 140, units = "mm", res = 300)
par(xpd = NA)
resultsBP <- gomwuPlot(inFile = "Dctrl.Cctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "BP",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.0, treeHeight = 0.6)

## GO terms dispayed:  15 
## "Good genes" accounted for:  56 out of 185 ( 30% )
#dev.off()
# save text representation of results to file
# save(resultsBP, file = "Dctrl.Cctrl.GO_MWU.results.RData")
#resultsBP[with(resultsBP, order(direction, pval)), ]

resultsMF <- gomwuPlot(inFile = "Dctrl.Cctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "MF",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.0, treeHeight = 0.6)

## GO terms dispayed:  5 
## "Good genes" accounted for:  15 out of 156 ( 10% )
resultsCC <- gomwuPlot(inFile = "Dctrl.Cctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "CC",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.0, treeHeight = 0.6)

## GO terms dispayed:  10 
## "Good genes" accounted for:  16 out of 149 ( 11% )

Red GO terms are significantly upregulated in D vs. C corals at ambient temp. Blue terms are significantly downregulated.

Annotations of DE genes

Dctrl.Cctrl.anno <- data.frame(Dctrl.Cctrl.sig) %>%
  mutate(transcript_id = rownames(.)) %>%
  left_join(trinotate) %>%
  select(`#gene_id`, everything()) %>%
  arrange(sign(log2FoldChange), padj)

write.table(Dctrl.Cctrl.anno, file = "output/Dctrl.Cctrl.anno.csv", sep = ",", row.names = F)

D vs. C corals under heat stress

# Test for differences between D and C corals in the heated treatment
Dheat.Cheat <- results(hs.dds, contrast=c("group", "bh", "ch"))

# Subset DE genes with adjusted p-value < 0.1
Dheat.Cheat.sig <- Dheat.Cheat[which(Dheat.Cheat$padj < 0.1 ), ]
Dheat.Cheat.p05 <- Dheat.Cheat[which(Dheat.Cheat$pvalue < 0.05), ]

# Get names of DE genes
Dheat.Cheat.DEgenes <- rownames(Dheat.Cheat.sig)

# Summarize DE
as.data.frame(Dheat.Cheat) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
0 0 0
# Make a heatmap
#phm(Dheat.Cheat.sig, vsd2, "trt", "heat", "sym")
#Dheat.Cheat.sig
#plotCounts(hs.dds, gene = "Mcavernosa00931", intgroup = c("group"))
#plotCounts(hs.dds, gene = "Mcavernosa24482", intgroup = c("sym", "trt"))
#plotCounts(hs.dds, gene = "Mcavernosa02675", intgroup = c("sym", "trt"))

phm(sig=Dheat.Cheat.sig, vsd=vsd2, grp1="bh", grp2="ch", main = "",
    color = colorRampPalette(rev(c("chocolate1","#FEE090","grey10", "cyan3","cyan")))(100),
    border_color = NA, annotation_colors = ann_colors, annotation_legend = FALSE,
    cellwidth = 8.6, cellheight = 4.2, fontsize_row = 4, fontsize_col = 6, fontsize = 4,
    scale = "row")

Only one gene is differentially expressed between D and C corals under heat stress. Not enough genes to run a GO enrichment analysis.

New: 14 genes, all upregulated in D corals compared to C corals

NOW: ZERO GENES

## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis

# Generate logP values
Dheat.Cheat$logP <- -log10(Dheat.Cheat$pvalue) * sign(Dheat.Cheat$log2FoldChange)

# Write to file

write.table(data.frame(gene=rownames(Dheat.Cheat), logP=Dheat.Cheat$logP), 
            file = "code/GO_MWU/Dheat.Cheat.logP.txt",
            row.names = FALSE, quote = FALSE, sep = ",")
# Run GO_MWU
setwd("code/GO_MWU")

commandArgs <- function(...) c("Dheat.Cheat.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 0  GO terms at 10% FDR
commandArgs <- function(...) c("Dheat.Cheat.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 0  GO terms at 10% FDR
commandArgs <- function(...) c("Dheat.Cheat.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 0  GO terms at 10% FDR
No GO terms are enriched in this contrast.

Annotations of DE genes

Dheat.Cheat.anno <- data.frame(Dheat.Cheat.sig) %>%
  mutate(transcript_id = rownames(.)) %>%
  left_join(trinotate) %>%
  select(`#gene_id`, everything()) %>%
  arrange(sign(log2FoldChange), padj)

write.table(Dheat.Cheat.anno, file = "output/Dheat.Cheat.anno.csv", sep = ",", row.names = F)

Heat vs. control in C corals

# Test for differences between C corals in the control vs. heated treatment
Cheat.Cctrl <- results(hs.dds, contrast=c("group", "ch", "cc"))

# Subset DE genes with adjusted p-value < 0.1
Cheat.Cctrl.sig <- Cheat.Cctrl[which(Cheat.Cctrl$padj < 0.1 ), ]
Cheat.Cctrl.p05 <- Cheat.Cctrl[which(Cheat.Cctrl$pvalue < 0.05), ]

# Get names of DE genes
Cheat.Cctrl.DEgenes <- rownames(Cheat.Cctrl.sig)

# Summarize DE
as.data.frame(Cheat.Cctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
81 38 43
# Make a heatmap
# Specify colors
ann_colors = list(
    sym = c(C = "blue", D = "red"),
    trt = c(heat = "red", ctrl = "yellow")
)

#png(filename = "figures/Cheat.Cctrl.DE.heatmap.png", width = 80, height = 120, unit = "mm", res = 300)
phm(Cheat.Cctrl.sig, vsd2, "ch", "cc", main = "",
    color = colorRampPalette(rev(c("chocolate1","#FEE090","grey10", "cyan3","cyan")))(100),
    border_color = NA, annotation_colors = ann_colors, annotation_legend = FALSE,
    cellwidth = 8.6, cellheight = 1.2, fontsize_row = 4, fontsize_col = 6, fontsize = 4,
    scale = "row")

#dev.off()

#phm(Cheat.Cctrl.sig, vsd2, "sym", "C", "trt")   # use vsd2 for the normcounts with batch (colony) effect removed
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis

# Generate logP values
Cheat.Cctrl$logP <- -log10(Cheat.Cctrl$pvalue) * sign(Cheat.Cctrl$log2FoldChange)

# Write to file

write.table(data.frame(gene=rownames(Cheat.Cctrl), logP=Cheat.Cctrl$logP), 
            file = "code/GO_MWU/Cheat.Cctrl.logP.txt",
            row.names = FALSE, quote = FALSE, sep = ",")
# Run GO_MWU
setwd("code/GO_MWU")

commandArgs <- function(...) c("Cheat.Cctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 35  GO terms at 10% FDR
commandArgs <- function(...) c("Cheat.Cctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 18  GO terms at 10% FDR
commandArgs <- function(...) c("Cheat.Cctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 7  GO terms at 10% FDR
# Plot GO_MWU results
setwd("code/GO_MWU")

#png(filename = "../../figures/Cheat.Cctrl.DE.GO.png", width = 140, height = 140, units = "mm", res = 300)
par(xpd = NA)
results <- gomwuPlot(inFile = "Cheat.Cctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "BP",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.2, treeHeight = 0.5)

## GO terms dispayed:  35 
## "Good genes" accounted for:  97 out of 177 ( 55% )
#dev.off()
# save text representation of results to file
# save(results, file = "Cheat.Cctrl.GO_MWU.results.RData")

resultsMF <- gomwuPlot(inFile = "Cheat.Cctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "MF",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.0, treeHeight = 0.6)

## GO terms dispayed:  18 
## "Good genes" accounted for:  45 out of 156 ( 29% )
resultsCC <- gomwuPlot(inFile = "Cheat.Cctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "CC",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.0, treeHeight = 0.6)

## GO terms dispayed:  7 
## "Good genes" accounted for:  18 out of 153 ( 12% )

Red GO terms are significantly upregulated in heated vs. control C corals. Blue terms are significantly downregulated.

Annotations of DE genes

Cheat.Cctrl.anno <- data.frame(Cheat.Cctrl.sig) %>%
  mutate(transcript_id = rownames(.)) %>%
  left_join(trinotate) %>%
  select(`#gene_id`, everything()) %>%
  arrange(sign(log2FoldChange), padj)

write.table(Cheat.Cctrl.anno, file = "output/Cheat.Cctrl.anno.csv", sep = ",", row.names = F)

Heat vs. control in D corals

# Test for differences between D corals in the control vs. heated treatment
Dheat.Dctrl <- results(hs.dds, contrast=c("group", "bh", "bc"))

# Subset DE genes with adjusted p-value < 0.1
Dheat.Dctrl.sig <- Dheat.Dctrl[which(Dheat.Dctrl$padj < 0.1 ), ]
Dheat.Dctrl.p05 <- Dheat.Dctrl[which(Dheat.Dctrl$pvalue < 0.05), ]

# Get names of DE genes
Dheat.Dctrl.DEgenes <- rownames(Dheat.Dctrl.sig)

# Summarize DE
as.data.frame(Dheat.Dctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
27 19 8
# Make a heatmap
#png(filename = "figures/Dheat.Dctrl.DE.heatmap.png", width = 80, height = 60, unit = "mm", res = 300)
phm(Dheat.Dctrl.sig, vsd2, "bh", "bc", main = "",
    color = colorRampPalette(rev(c("chocolate1","#FEE090","grey10", "cyan3","cyan")))(100),
    border_color = NA, annotation_colors = ann_colors, annotation_legend = FALSE,
    cellwidth = 8.6, cellheight = 1.2, fontsize_row = 4, fontsize_col = 6, fontsize = 4,
    scale = "row")

#dev.off()

#phm(Dheat.Dctrl.sig, vsd2, "sym", "D", "trt")
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis

# Generate logP values
Dheat.Dctrl$logP <- -log10(Dheat.Dctrl$pvalue) * sign(Dheat.Dctrl$log2FoldChange)

# Write to file

write.table(data.frame(gene=rownames(Dheat.Dctrl), logP=Dheat.Dctrl$logP), 
            file = "code/GO_MWU/Dheat.Dctrl.logP.txt",
            row.names = FALSE, quote = FALSE, sep = ",")
setwd("code/GO_MWU")

# Run GO_MWU
commandArgs <- function(...) c("Dheat.Dctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 3  GO terms at 10% FDR
commandArgs <- function(...) c("Dheat.Dctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 12  GO terms at 10% FDR
commandArgs <- function(...) c("Dheat.Dctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 6  GO terms at 10% FDR
# Plot GO_MWU results
setwd("code/GO_MWU")

#png(filename = "../../figures/Dheat.Dctrl.DE.GO.png", width = 140, height = 60, units = "mm", res = 300)
par(xpd = NA)
results <- gomwuPlot(inFile = "Dheat.Dctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "BP",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.2, treeHeight = 0.5)

## GO terms dispayed:  3 
## "Good genes" accounted for:  4 out of 128 ( 3% )
#dev.off()
 # save text representation of results to file
# save(results, file = "Dheat.Dctrl.GO_MWU.results.RData")

resultsMF <- gomwuPlot(inFile = "Dheat.Dctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "MF",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.0, treeHeight = 0.6)

## GO terms dispayed:  12 
## "Good genes" accounted for:  14 out of 116 ( 12% )
resultsCC <- gomwuPlot(inFile = "Dheat.Dctrl.logP.txt",
                     goAnnotations = "Mcavernosa_gene2go.txt",
                     goDivision = "CC",
                     level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.0, treeHeight = 0.6)

## GO terms dispayed:  6 
## "Good genes" accounted for:  7 out of 105 ( 7% )

Red GO terms are significantly upregulated in heated vs. control D corals. Blue terms are significantly downregulated.

Annotations of DE genes

Dheat.Dctrl.anno <- data.frame(Dheat.Dctrl.sig) %>%
  mutate(transcript_id = rownames(.)) %>%
  left_join(trinotate) %>%
  select(`#gene_id`, everything()) %>%
  arrange(sign(log2FoldChange), padj)

write.table(Dheat.Dctrl.anno, file = "output/Dheat.Dctrl.anno.csv", sep = ",", row.names = F)

Other contrasts to use for all-DGE-PCoA later

# Test for differences between C corals in the control vs. heated treatment
Cheat.Dctrl <- results(hs.dds, contrast=c("group", "ch", "bc"))

# Subset DE genes with adjusted p-value < 0.1
Cheat.Dctrl.sig <- Cheat.Dctrl[which(Cheat.Dctrl$padj < 0.1 ), ]
Cheat.Dctrl.p05 <- Cheat.Dctrl[which(Cheat.Dctrl$pvalue < 0.05), ]

# Get names of DE genes
Cheat.Dctrl.DEgenes <- rownames(Cheat.Dctrl.sig)

# Summarize DE
as.data.frame(Cheat.Dctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
22 12 10
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis

# Generate logP values
Cheat.Dctrl$logP <- -log10(Cheat.Dctrl$pvalue) * sign(Cheat.Dctrl$log2FoldChange)

# Write to file

write.table(data.frame(gene=rownames(Cheat.Dctrl), logP=Cheat.Dctrl$logP), 
            file = "code/GO_MWU/Cheat.Dctrl.logP.txt",
            row.names = FALSE, quote = FALSE, sep = ",")
# Test for differences between C corals in the control vs. heated treatment
Dheat.Cctrl <- results(hs.dds, contrast=c("group", "bh", "cc"))

# Subset DE genes with adjusted p-value < 0.1
Dheat.Cctrl.sig <- Dheat.Cctrl[which(Dheat.Cctrl$padj < 0.1 ), ]
Dheat.Cctrl.p05 <- Dheat.Cctrl[which(Dheat.Cctrl$pvalue < 0.05), ]

# Get names of DE genes
Dheat.Cctrl.DEgenes <- rownames(Dheat.Cctrl.sig)

# Summarize DE
as.data.frame(Dheat.Cctrl) %>%
  filter(padj < 0.1) %>%
  summarise(total = n(),
            up = sum(log2FoldChange > 0),
            down = sum(log2FoldChange < 0)) %>%
  knitr::kable(caption = "Differential expression")
Differential expression
total up down
42 23 19
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis

# Generate logP values
Dheat.Cctrl$logP <- -log10(Dheat.Cctrl$pvalue) * sign(Dheat.Cctrl$log2FoldChange)

# Write to file

write.table(data.frame(gene=rownames(Dheat.Cctrl), logP=Dheat.Cctrl$logP), 
            file = "code/GO_MWU/Dheat.Cctrl.logP.txt",
            row.names = FALSE, quote = FALSE, sep = ",")

KOG

library(KOGMWU)  # install.packages("KOGMWU")

gene2kog1 <- read.table("data/genome/Mcavernosa_iso2kog.tab", sep = "\t") # Downloaded from Studivan github
#gene2kog2 <- read.table("~/Downloads/Mcav_genome\ 2/Mcavernosa_annotation/Mcavernosa_gene2kogClass.tab", sep = "\t") # Made from Mcav_genome on Dropbox using commands in z0on emapper_to_GOMWU_KOGMWU repo readme
### These two gene2kog files are different and I don't know which one is right to use. They produce different results.
# ddd <- full_join(gene2kog1, gene2kog2, by = "V1")
# head(ddd)
# dim(gene2kog1)
# dim(gene2kog2)
# 
# length(intersect(gene2kog1$V1, gene2kog2$V1))

# 
# setdiff(gene2kog1$V1, gene2kog2$V1)
# setdiff(gene2kog2$V1, gene2kog1$V1)
# gene2kog1 %>%
#   filter(V1 == "Mcavernosa00617")

# library(Biostrings)
# transcripts <- readDNAStringSet("data/genome/Mcavernosa.maker.transcripts.fasta")
# transcript_names <- gsub(pattern="-RA.*$", replacement = "", names(transcripts))
# transcript_names
# length(transcript_names)
# table(gene2kog1$V1 %in% transcript_names)
# table(gene2kog2$V1 %in% transcript_names)

gene2kog <- gene2kog1

# lowcounts <- gene2kog %>%
#   count(V2) %>%
#   filter(n < 100) %>%
#   select(V2)
  
gene2kog <- gene2kog %>% 
  filter(!V2 == "") #%>%
  # filter(!V2 %in% lowcounts$V2)


Dctrl.Cctrl.logP <- read.table("code/GO_MWU/Dctrl.Cctrl.logP.txt", sep = ",", skip = 1)
Dctrl.Cctrl.kog <- kog.mwu(Dctrl.Cctrl.logP, gene2kog)
## [1] "Continuous measure of interest: will perform MWU test"
Dctrl.Cctrl.l2fc <- data.frame(V1 = rownames(Dctrl.Cctrl), V2 = Dctrl.Cctrl$log2FoldChange)
Dctrl.Cctrl.kog.l2fc <- kog.mwu(Dctrl.Cctrl.l2fc, gene2kog)
## [1] "Continuous measure of interest: will perform MWU test"
Dctrl.Cctrl.kog <- Dctrl.Cctrl.kog %>%
  mutate(delta.rank = ifelse(nseqs < 10, 0, delta.rank))

Cheat.Cctrl.logP <- read.table("code/GO_MWU/Cheat.Cctrl.logP.txt", sep = ",", skip = 1)
Cheat.Cctrl.kog <- kog.mwu(Cheat.Cctrl.logP, gene2kog)
## [1] "Continuous measure of interest: will perform MWU test"
Cheat.Cctrl.kog <- Cheat.Cctrl.kog %>%
  mutate(delta.rank = ifelse(nseqs < 10, 0, delta.rank))

Dheat.Dctrl.logP <- read.table("code/GO_MWU/Dheat.Dctrl.logP.txt", sep = ",", skip = 1)
Dheat.Dctrl.kog <- kog.mwu(Dheat.Dctrl.logP, gene2kog)
## [1] "Continuous measure of interest: will perform MWU test"
Dheat.Dctrl.kog <- Dheat.Dctrl.kog %>%
  mutate(delta.rank = ifelse(nseqs < 10, 0, delta.rank))

Dheat.Cheat.logP <- read.table("code/GO_MWU/Dheat.Cheat.logP.txt", sep = ",", skip = 1)
Dheat.Cheat.kog <- kog.mwu(Dheat.Cheat.logP, gene2kog)
## [1] "Continuous measure of interest: will perform MWU test"
kogtable <- makeDeltaRanksTable(list("Dctrl.Cctrl" = Dctrl.Cctrl.kog,
                                     "Cheat.Cctrl" = Cheat.Cctrl.kog,
                                     "Dheat.Dctrl" = Dheat.Dctrl.kog))

Dctrl.Cctrl.kog.pval <- arrange(Dctrl.Cctrl.kog, match(term, rownames(kogtable)))
Cheat.Cctrl.kog.pval <- arrange(Cheat.Cctrl.kog, match(term, rownames(kogtable)))
Dheat.Dctrl.kog.pval <- arrange(Dheat.Dctrl.kog, match(term, rownames(kogtable)))
kogpvaltable <- tibble(
  term = rownames(kogtable),
  Dctrl.Cctrl = stars.pval(Dctrl.Cctrl.kog.pval$padj),
  Cheat.Cctrl = stars.pval(Cheat.Cctrl.kog.pval$padj),
  Dheat.Dctrl = stars.pval(Dheat.Dctrl.kog.pval$padj)
)
kogpvaltable
## # A tibble: 23 x 4
##    term                                 Dctrl.Cctrl Cheat.Cctrl Dheat.Dctrl
##    <chr>                                <chr>       <chr>       <chr>      
##  1 Translation, ribosomal structure an… ***         ***         ***        
##  2 Lipid transport and metabolism       *           **          " "        
##  3 Extracellular structures             *           **          " "        
##  4 Replication, recombination and repa… *           " "         " "        
##  5 Intracellular trafficking, secretio… *           " "         " "        
##  6 Carbohydrate transport and metaboli… .           **          " "        
##  7 Chromatin structure and dynamics     " "         " "         " "        
##  8 Inorganic ion transport and metabol… " "         ***         " "        
##  9 Energy production and conversion     " "         " "         " "        
## 10 Signal transduction mechanisms       " "         " "         " "        
## # … with 13 more rows
#kogtable

# customizing color palette for the heatmap
paletteLength <- 100
myColor <- colorRampPalette(rev(c("chocolate1","#FEE090","grey10", "cyan3","cyan")))(paletteLength)
#myColor <- colorRampPalette(rev(c(brewer.pal(n = 7, name = "RdYlBu"),"darkblue","darkblue")))(100)
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(min(as.matrix(kogtable)), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(as.matrix(kogtable))/paletteLength, max(as.matrix(kogtable)),
                  length.out=floor(paletteLength/2)))

# Plot the heatmap
#pheatmap(as.matrix(kogtable), color=myColor, breaks=myBreaks)

pheatmap(as.matrix(kogtable),
         #clustering_distance_cols="correlation",
         cluster_cols = F,
         color=myColor, breaks = myBreaks,
         treeheight_row=15,
         treeheight_col=15,
         border_color="white",
         scale = "none",
         display_numbers = kogpvaltable[,2:4])

Overlap in DEGs

dc.cc <- rownames(counts(hs.dds)) %in% Dctrl.Cctrl.DEgenes
dh.ch <- rownames(counts(hs.dds)) %in% Dheat.Cheat.DEgenes
ch.cc <- rownames(counts(hs.dds)) %in% Cheat.Cctrl.DEgenes
dh.dc <- rownames(counts(hs.dds)) %in% Dheat.Dctrl.DEgenes



a <- cbind(ch.cc, dh.dc)
vennDiagram(vennCounts(a),
            circle.col = c("blue", "orange"))

# 
a <- cbind(dh.dc, dc.cc, ch.cc)
vennDiagram(vennCounts(a))

# 
# a <- cbind(dc.cc, ch.cc)
# vennDiagram(vennCounts(a))


# genes upreg in D vs. C control   AND    C heat vs. control
# Does D 'frontload' coral heat response?

a <- cbind(dc.cc, ch.cc)
vennDiagram(vennCounts(a))

intersect(Dctrl.Cctrl.DEgenes, Cheat.Cctrl.DEgenes)
## [1] "Mcavernosa00734" "Mcavernosa00920" "Mcavernosa01063" "Mcavernosa04519"
## [5] "Mcavernosa08291" "Mcavernosa08323" "Mcavernosa15354" "Mcavernosa15521"
## [9] "Mcavernosa25626"
Dctrl.Cctrl["Mcavernosa24519",]
## log2 fold change (MLE): group bc vs cc 
## Wald test p-value: group bc vs cc 
## DataFrame with 1 row and 7 columns
##                         baseMean    log2FoldChange            lfcSE
##                        <numeric>         <numeric>        <numeric>
## Mcavernosa24519 42.0239402866443 -3.15451028596462 1.01375435037366
##                             stat              pvalue              padj
##                        <numeric>           <numeric>         <numeric>
## Mcavernosa24519 -3.1117107263726 0.00186006667217117 0.124417792960783
##                              logP
##                         <numeric>
## Mcavernosa24519 -2.73047148866537
Cheat.Cctrl["Mcavernosa24519",]
## log2 fold change (MLE): group ch vs cc 
## Wald test p-value: group ch vs cc 
## DataFrame with 1 row and 7 columns
##                         baseMean   log2FoldChange            lfcSE
##                        <numeric>        <numeric>        <numeric>
## Mcavernosa24519 42.0239402866443 -3.7251838906498 1.01881593512236
##                              stat               pvalue               padj
##                         <numeric>            <numeric>          <numeric>
## Mcavernosa24519 -3.65638557685341 0.000255796511875242 0.0212311104856451
##                              logP
##                         <numeric>
## Mcavernosa24519 -3.59210538199835
# "Mcavernosa00334" -- RNA processing

# "Mcavernosa00734" -- acid phosphatase (endosomal maturation in Downs et al. 2009)

# "Mcavernosa00920" -- Probable medium-chain specific acyl-CoA dehydrogenase, mitochondrial;

# "Mcavernosa04519" -- copper transporting ATPase

# "Mcavernosa08291" --# aka ENTR1 - endosome-associated-trafficking regulator 1
# without it, TNF receptor expression is reduced, resulting in resistance to TNF-induced apoptosis
# and also protein trafficking and secretion decreased. (Neznanov et al. 2005)

# "Mcavernosa08323" -- peroxiredoxin

# "Mcavernosa15354" -- vacuolar sorting protein, arrestin domain. protein sorting? signaling?

# "Mcavernosa15521" -- BRO1 domain - vacuoles and lysosomes targeting - interpro
# programmed cell death interacting protein - blast
# Li et al. Sci Adv -- DE in symbiosis establishment/breakdown aiptasia

# "Mcavernosa16961" --  protein-protein interactions

# "Mcavernosa24519" -- cytochrome p450 - downregulated

# "Mcavernosa25318" -- fibrillar collagen-like protein

# "Mcavernosa25626" -- cilia and flagella associated

# "Mcavernosa29636" -- # leucine-rich repeat-containing protein

# "Mcavernosa30546" -- CAAX prenyl protease -- protein degradation?
# Kirk et al. 2018 Mol Ecol

Ordination with only DEGs (DEGs from all contrasts)

vsd2f <- vsd2[, colData(vsd2)$colony %in% c(20, 22, 26) & colData(vsd2)$group %in% c("bc", "bh", "cc", "ch")]

# Subset DEGs as padj < 0.1
# Use vsd2f for the batch(/colony)-corrected normalized counts, or vsdf for the regular normalized counts
DEGs.dds <- vsd2f[ rownames(vsd2f) %in% c(Dctrl.Cctrl.DEgenes, Dheat.Cheat.DEgenes, 
                                      Cheat.Cctrl.DEgenes, Dheat.Dctrl.DEgenes,
                                      Dheat.Cctrl.DEgenes, Cheat.Dctrl.DEgenes), ]

# Subset DEGs as raw p < 0.05
# DEGs.dds <- vsdf[ rownames(vsdf) %in% c(
#    rownames(Dctrl.Cctrl.p05), rownames(Dheat.Cheat.p05),
#    rownames(Cheat.Cctrl.p05), rownames(Dheat.Dctrl.p05)), ]

## Run PCA and return data to visualize with ggplot
#pcaData <- plotPCA(DEGs.dds, intgroup = c( "colony", "group"), returnData = TRUE)
#percentVar <- round(100 * attr(pcaData, "percentVar"))

## Plot PCA
# ggplot(pcaData, aes(x = PC1, y = PC2, color = group.1, shape = colony)) +
#   geom_point(alpha = 0.5) +
#   labs(title =  "PCA of normalized gene expression") +
#   xlab(paste0("PC1: ", percentVar[1], "% variance")) +
#   ylab(paste0("PC2: ", percentVar[2], "% variance"))

## Calculate Euclidean distances among samples
sampleDists <- dist(t(assay(DEGs.dds)), method = "euclidean")
sampleDistMatrix <- as.matrix( sampleDists )

## PCoA
mds <- as.data.frame(colData(DEGs.dds))  %>%
         cbind(cmdscale(sampleDistMatrix, k = 2))

# Create convex hulls to plot around groups
Cc.hull <- mds[mds$group == "cc", ][chull(mds[mds$group == "cc", c("1", "2")]), ]
Dc.hull <- mds[mds$group == "bc", ][chull(mds[mds$group == "bc", c("1", "2")]), ]
Ch.hull <- mds[mds$group == "ch", ][chull(mds[mds$group == "ch", c("1", "2")]), ]
Dh.hull <- mds[mds$group == "bh", ][chull(mds[mds$group == "bh", c("1", "2")]), ]
hull.data <- rbind(Cc.hull, Dc.hull, Ch.hull, Dh.hull)  #combine grp.a and grp.b

# Plot PCoA
ggplot(mds, aes(x = `1`, y = `2`,  color = group)) +
  geom_polygon(data = hull.data, aes(x = `1`, y = `2`, fill = group, group = group), alpha = 0.30) +
  geom_point(alpha = 0.5) + coord_fixed() +
  labs(title = "PCoA of DEGs") +
  scale_color_manual(values = c("darkblue", "cornflowerblue", "darkorange3", "orange")) +
  scale_fill_manual(values = c("darkblue", "cornflowerblue", "darkorange3", "orange"))

# Plot with spiders instead of hulls
scrs <- mds[, c("group", "1", "2")]
cent <- aggregate(cbind(`1`, `2`) ~ group, data = scrs, FUN = mean)
segs <- merge(scrs, setNames(cent, c('group','o1','o2')),
              by = 'group', sort = FALSE)

ggplot(mds, aes(x = `1`, y = `2`,  color = group)) +
  geom_segment(data = segs, mapping = aes(xend = o1, yend = o2)) +
  geom_point(data = cent, size = 5) +
  geom_point(alpha = 0.5) + coord_fixed() +
  labs(title = "PCoA of DEGs") +
  scale_color_manual(values = c("darkblue", "cornflowerblue", "darkorange3", "orange")) +
  scale_fill_manual(values = c("darkblue", "cornflowerblue", "darkorange3", "orange"))

#plot_ly(x=mds$`1`, y=mds$`2`, z=mds$`3`, color=mds$symtrt)


# Plot with ellipses
# ggplot(mds, aes(x = `1`, y = `2`, shape = sym:trt, color = sym:trt)) +
#   geom_point(alpha = 0.5) + coord_fixed() +
#   stat_ellipse(level = 0.50) +
#   labs(title = "PCoA of DEGs")

# First 3 axes all separate but hard to visualize 3d
# Try 2D NMDS
# nmds <- metaMDS(t(assay(DEGs.dds)), distance = "euclidean", k = 2)
# 
# mds <- as.data.frame(colData(DEGs.dds))  %>%
#          cbind(nmds$points)
# 
# ggplot(mds, aes(x = MDS1, y = MDS2, shape = sym:trt, color = sym:trt)) +
#   geom_point(alpha = 0.5) + coord_fixed() +
#   stat_ellipse(level = 0.50) +
#   labs(title = "PCoA of DEGs")

# ## Run PCA and return data to visualize with ggplot
# pcaData <- plotPCA(DEGs.dds, intgroup = c( "colony", "sym", "trt"), returnData = TRUE)
# percentVar <- round(100 * attr(pcaData, "percentVar"))
# 
# ## Plot PCA
# ggplot(pcaData, aes(x = PC1, y = PC2, color = sym:trt)) +
#   geom_point(alpha = 0.5) +
#   labs(title =  "PCA: all genes") +
#   xlab(paste0("PC1: ", percentVar[1], "% variance")) +
#   ylab(paste0("PC2: ", percentVar[2], "% variance"))

Are DEGs associated with symbiont shuffling due to having changed symbionts, or having bleached?

Other coral genotypes were bleached but did not shuffle their symbionts – they had clade C to begin with, and clade C after recovery. We can use these samples as controls to test whether the change in gene expression in corals that shuffle is attributable to the symbiont change or the previous stress.

# Colonies EXCEPT 20, 22, and 26
# Only time point at end of heat stress (but these were all at ambient - no hs)
ns.dds <- dds[, !colData(dds)$colony %in% c(20, 22, 26) & colData(dds)$date_sampled == "2017-10-28"]
# Filter out Mc2840 and Mc1740 -- these colonies had a high proportion of D.
# (everything else had zero D, except 2 other samples had 7-8%)
ns.dds <- ns.dds[, !rownames(colData(ns.dds)) %in% c("Mc28.40")]
# Add design formula for heat stress experiment and DROP UNUSED FACTOR LEVELS!
design(ns.dds) <- formula(~ colony + trt1)
colData(ns.dds) <- droplevels(colData(ns.dds))

# Run DESeq pipeline
ns.dds <- DESeq(ns.dds)

# Find DE genes between prev. bleached vs. control (trt1)
res <- results(ns.dds, contrast = c("trt1", "b", "c"))
summary(res)  ### No differentially expressed genes!
## 
## out of 17099 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 0, 0%
## LFC < 0 (down)     : 0, 0%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

No genes are differentially expressed in corals that bleached and recovered with the same symbionts vs. those that never bleached. This suggests transcriptomes have ‘recovered’ from the initial bleaching treatment, and that the genes that are differentially expressed in colonies that shuffled from C to D are actuially due to the change in symbionts after bleaching, not the bleaching itself.

ns.vsd <- vst(ns.dds, blind = FALSE)
## Calculate Euclidean distances among samples
sampleDists <- dist(t(assay(ns.vsd)), method = "manhattan")
sampleDistMatrix <- as.matrix( sampleDists )

## Plot NMDS
mds <- as.data.frame(colData(ns.vsd)) %>%
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(group = recode(group, bc = "D - control", bh = "D - heated", 
                          cc = "C - control", ch = "C - heated"))

#mdsf <- mds %>% filter(group %in% c("ca", "ch", "ba", "bh"))

ggplot(mds, aes(x = `1`, y = `2`, color = colony, shape = group)) +
    geom_point() +# coord_fixed() +
    scale_shape_manual(values = c(21,24,16,17)) +
    labs(x = "PC1", y = "PC2")# +

    geom_text(label = paste(mds$colony, mds$core))
## geom_text: parse = FALSE, check_overlap = FALSE, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
design(hs.dds) <- formula(~ colony + colony:symtrt)
hs.dds <- DESeq(hs.dds)
resultsNames(hs.dds)

col20 <- results(hs.dds, name = "colony20.symtrtD.ctrl")
col20de <- rownames(col20[which(col20$padj < 0.1), ])
plotCounts(hs.dds, gene = "Mcavernosa00334", intgroup = c("colony", "symtrt"))
summary(col20)

col22 <- results(hs.dds, name = "colony22.symtrtD.ctrl")
col22de <- rownames(col22[which(col22$padj < 0.1), ])
plotCounts(hs.dds, gene = "Mcavernosa00044", intgroup = c("colony", "symtrt"))
summary(col22)

col26 <- results(hs.dds, name = "colony26.symtrtD.ctrl")
col26de <- rownames(col26[which(col26$padj < 0.1), ])
plotCounts(hs.dds, gene = "Mcavernosa10380", intgroup = c("colony", "symtrt"))
summary(col26)

col20de1 <- rownames(counts(hs.dds)) %in% col20de
col22de1 <- rownames(counts(hs.dds)) %in% col22de
col26de1 <- rownames(counts(hs.dds)) %in% col26de
a <- cbind(col20de1, col22de1, col26de1)
vennDiagram(vennCounts(a))

rownames(counts(hs.dds))[which(rowSums(a) == 3)]

Annotations of genes upregulated in D vs. C controls

data.frame(Dctrl.Cctrl.sig) %>%
  mutate(gene = rownames(.)) %>%
  arrange(padj)

#Mcavernosa03879
plotCounts(hs.dds, gene = "Mcavernosa03879", intgroup = c("group"))
# no good annotations for this protein. 

#Mcavernosa23377
plotCounts(hs.dds, gene = "Mcavernosa23377", intgroup = c("group"))
# no good annotations for this protein. 

#Mcavernosa23377
plotCounts(hs.dds, gene = "Mcavernosa24278", intgroup = c("group"))
# no good annotations for this protein. 

#Mcavernosa23377
plotCounts(hs.dds, gene = "Mcavernosa00044", intgroup = c("group"))
# no good annotations for this protein. 

#Mcavernosa23531
plotCounts(hs.dds, gene = "Mcavernosa23531", intgroup = c("group"))
# has a BRICHOS domain, which may have something to do with post-translational modification

#Mcavernosa01063
plotCounts(hs.dds, gene = "Mcavernosa01063", intgroup = c("group"))
#A toxin in nematocysts. Kalicludines and kaliseptine

#Mcavernosa01062
plotCounts(hs.dds, gene = "Mcavernosa01062", intgroup = c("sym", "trt"))
# Transmembrane protein with extracellular Kunitz domain/trypsin inhibitor
# Trypsin is an enzyme that breaks things down as part of digestion... arrested phagosome?

#Mcavernosa00334
plotCounts(hs.dds, gene = "Mcavernosa00334", intgroup = c("sym", "trt"))
# RNA processing?

#Mcavernosa05961
plotCounts(hs.dds, gene = "Mcavernosa05961", intgroup = c("sym", "trt"))
#Enkurin -- could be localized to cilia? Sigg et al.

#Mcavernosa01259
plotCounts(hs.dds, gene = "Mcavernosa01259", intgroup = c("sym", "trt"))
# hsp molecular chaperone DnaJ domain

#Mcavernosa08323
plotCounts(hs.dds, gene = "Mcavernosa08323", intgroup = c("group"))
# Peroxiredoxin - Thioredoxin-like - response to oxidative stress
# See Maor-Landaw and Levy 2016 for analysis of expression in corals

#Mcavernosa32478
plotCounts(hs.dds, gene = "Mcavernosa32478", intgroup = c("group"))
# kintoun - cilia protein?
# kintoun-like protein found to be SNP outlier in corals adapted to hot Palau lagoons (H Rivera dissertation)
# could function as stress response chaperone (von Morgen et al.)

#Mcavernosa05331
plotCounts(hs.dds, gene = "Mcavernosa05331", intgroup = c("sym", "trt"))
#BMP-binding endothelial regulator protein - crossveinless2. lots of different domains on InterPro...

#Mcavernosa10389
plotCounts(hs.dds, gene = "Mcavernosa10389", intgroup = c("sym", "trt"))
#Major family superfamily (MFS transporter) domain containing protein
# Transports small solutes. may transport G3P
# Sproles et al. -- MFS transporters in cnidarian dinoflagellate symbiosis

#Mcavernosa04519
plotCounts(hs.dds, gene = "Mcavernosa04519", intgroup = c("sym", "trt"))
# Copper-transporting ATPase

# Mcavernosa09839
plotCounts(hs.dds, gene = "Mcavernosa09839", intgroup = c("sym", "trt"))
# tyrosine phosphatase

# Mcavernosa03538
plotCounts(hs.dds, gene = "Mcavernosa03538", intgroup = c("sym", "trt"))
# no annotations, signal peptide domain and non-cytoplasmic domain on INterPro

# Mcavernosa10382
plotCounts(hs.dds, gene = "Mcavernosa10382", intgroup = c("sym", "trt"))
# protein prune homolog
# interacts with pro and anti apoptotic proteins (InterPro) 

#Mcavernosa13113
plotCounts(hs.dds, gene = "Mcavernosa13113", intgroup = c("sym", "trt"))
# serine incorporator

Annotations of genes downregulated in D vs. C controls

# Mcavernosa29636
plotCounts(hs.dds, gene = "Mcavernosa29636", intgroup = c("sym", "trt"))
# leucine-rich repeat-containing protein

#Mcavernosa16961
plotCounts(hs.dds, gene = "Mcavernosa16961", intgroup = c("sym", "trt"))
# protein-protein interactions

#Mcavernosa08291
plotCounts(hs.dds, gene = "Mcavernosa08291", intgroup = c("sym", "trt"))
# "serologically defined colon cancer antigen" 
# aka ENTR1 - endosome-associated-trafficking regulator 1
# without it, TNF receptor expression is reduced, resulting in resistance to TNF-induced apoptosis
# and also protein trafficking and secretion decreased. (Neznanov et al. 2005)

#Mcavernosa35232
plotCounts(hs.dds, gene = "Mcavernosa35232", intgroup = c("sym", "trt"))
# no blast hits or annotations
# mobidb disoarder prediction

#Mcavernosa10380
plotCounts(hs.dds, gene = "Mcavernosa10380", intgroup = c("sym", "trt"))
#hsp20-like chaperone, calcyclin-binding, 
# interacts with SIAH, an E3 ubiquitin ligase with role in apoptosis


#Mcavernosa32338
plotCounts(hs.dds, gene = "Mcavernosa32338", intgroup = c("sym", "trt"))
# Ras-related protein Rab33, GTPase activity
# GTPases control assembly of vesicle coats for transport vesicles

#Mcavernosa15521
plotCounts(hs.dds, gene = "Mcavernosa15521", intgroup = c("sym", "trt"))
#BRO1 domain - vacuoles and lysosomes targeting - interpro
# programmed cell death interacting protein - blast
# Li et al. Sci Adv -- DE in symbiosis establishment/breakdown aiptasia

# Mcavernosa30546
plotCounts(hs.dds, gene = "Mcavernosa30546", intgroup = c("sym", "trt"))
# CAAX prenyl protease -- protein degradation?
# Kirk et al. 2018 Mol Ecol

# Mcavernosa13875
plotCounts(hs.dds, gene = "Mcavernosa13875", intgroup = c("sym", "trt"))
# DNA-binding, transcription factor

# Mcavernosa25626
plotCounts(hs.dds, gene = "Mcavernosa25626", intgroup = c("sym", "trt"))

#Mcavernosa15354
plotCounts(hs.dds, gene = "Mcavernosa15354", intgroup = c("sym", "trt"))

# Mcavernosa21783
plotCounts(hs.dds, gene = "Mcavernosa21783", intgroup = c("sym", "trt"))
# peroxisomal membrane protein
# peroxisomes involved in reduction of hydrogen peroxide / oxidative stress